pdata_sharing <- df %>% 
  filter(!fund.on.data.history.) %>% 
  select(.run.number., .step., share.data.,
         mean.grants.groups:sum..total.primary.publications..of.groups) %>% 
  pivot_longer(-c(.run.number., .step., share.data.)) %>% 
  drop_na()

pdata_sharing %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = share.data.)) +
  geom_smooth() +
  facet_wrap(vars(name), nrow = 2, scales = "free_y")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

inequality is lower when sharing data

p <- pdata_sharing %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = share.data., group = .run.number.)) +
  geom_line() +
  facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
# data sharing with funding reward
data_funding <- df %>% 
  filter(share.data.) %>% 
  select(.run.number., .step., fund.on.data.history.,
         mean.grants.groups:sum..total.primary.publications..of.groups) %>% 
  pivot_longer(-c(.run.number., .step., fund.on.data.history.)) %>% 
  drop_na()

data_funding %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = fund.on.data.history.)) +
  geom_smooth() +
  facet_wrap(vars(name), nrow = 2, scales = "free_y")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

p <- data_funding %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = fund.on.data.history., group = .run.number.)) +
  geom_line() +
  facet_wrap(vars(name), nrow = 2)
plotly::ggplotly(p)
# compare three
comparison <- df %>% 
  select(.run.number., .step., fund.on.data.history., share.data.,
         mean.grants.groups:sum..total.datasets..of.groups) %>% 
  pivot_longer(-c(.run.number., .step., fund.on.data.history., share.data.)) %>% 
  drop_na()

comparison %>% 
  mutate(experiment = case_when(
    !share.data. & !fund.on.data.history. ~ "no sharing",
    share.data. & !fund.on.data.history. ~ "only sharing",
    share.data. & fund.on.data.history. ~ "share and reward",
    TRUE ~ NA_character_
  )) %>% 
  select(-share.data., -fund.on.data.history.) %>% 
  drop_na() -> comparison

comparison %>% 
  filter(str_detect(name, "gini")) %>% 
  ggplot(aes(.step., value, colour = experiment)) +
  geom_smooth() +
  facet_wrap(vars(name), nrow = 2, scales = "free_y")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

almost no difference: explanation: publications and datasets are closely linked. (runs with more publications also have more datasets). those that are successful are also those that share (because one funder demands it). also chance/error still large influence. could play with number of funders, share of funders that mandates data

Next thing to look at: play with rate of data vs pub history, and display the fraction of data and normal grants.

Q on experiments

does funding based on history lead to higher concentration? i.e., groups that got grants with data sharing tend to get more grants of the same sort?

numbers of datasets

comparison %>% 
  filter(str_detect(name, "count"),
         experiment != "no sharing") %>% 
  ggplot(aes(.step., value, colour = experiment, group = .run.number.)) +
  geom_line()

total datsets

comparison %>% 
  filter(experiment != "no sharing", 
         str_detect(name, "total\\.datasets"),
         .step. == 500) %>% 
  ggplot(aes(experiment, value, fill = experiment)) +
  geom_boxplot(show.legend = FALSE, notch = TRUE) +
  labs(y = "Total # of datasets produced", x = NULL)

numbers of publications

comparison %>% 
  filter(str_detect(name, "total\\.primar"),
         .step. == 500) %>% 
  ggplot(aes(experiment, value, fill = experiment)) +
  geom_boxplot(show.legend = FALSE, notch = TRUE) +
  labs(y = "Total # of publications produced", x = NULL)